library(twitteR)
## Warning: package 'twitteR' was built under R version 4.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
##
## lookup_statuses
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(RColorBrewer)
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
CONSUMER_SECRET <- "PvNTAZrNQNUPf655uhQGDvqnBhEkOtdnCUChsPooSBWrMeCdj6"
CONSUMER_KEY <- "0eKM7ywAbJIBhoc6qUtPGZ2gc"
ACCESS_SECRET <- "qSvpFsynx6zadfrSvrTaihnqm4jfZ4XNEI6F6LwmWJ2B7"
ACCESS_TOKEN <- "1594992291864727553-wz6TgceS7HWYmZ7Tvn05kMNFAOBkaw"
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token = ACCESS_TOKEN,
access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"
Extract 10000 tweets from Twitter using twitteR package including retweets
NBAtweets <- searchTwitter("NBA",
n=10000,
since = "2022-12-05",
until = "2022-12-11",
lang = "en",
retryOnRateLimit = 120
)
Convert into Data Frame
NBAdf <- twListToDF(NBAtweets)
head(NBAdf$text)[1:5]
## [1] "@EBomb_NBA I tend to agree.\n\nIDK who they could get. I'd want a win now trade.\n\nSomething like KAT for OG plus"
## [2] "RT @NBA: An updated look at the NBA Standings 👀\n\nFor more, download the NBA App\n🏀 https://t.co/6FlAli0aPP https://t.co/o7BJTmCmCS"
## [3] "@NBA better than curry"
## [4] "Pistons news: Jalen Duren and some big problems for Detroit @NBA https://t.co/qmxi9uHewa"
## [5] "RT @NBA: Big DUNKS from LUKA & ZION headline Friday's TOP 10 PLAYS! https://t.co/wOKl1KIiTH"
Saving and loading the Data Frame
save(NBAdf,file = "NBAdf.Rdata")
load(file = "NBAdf.Rdata")
—-Original Tweets—–
Subsetting original tweets
tweetsNBA <- NBAdf %>%
select(screenName,text,created,isRetweet) %>% filter(isRetweet == FALSE)
Saving and loading Original Tweets
save(tweetsNBA,file = "Original_Tweetsdf.Rdata")
load(file = "Original_Tweetsdf.Rdata")
Grouping the data created
tweetsNBA %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-10 23:59:59 2022-12-10 21:21:02
data1 <- tweetsNBA %>% mutate(Created_At_Round = created %>% round(units = 'hours')
%>% as.POSIXct())
mn <- tweetsNBA %>% pull(created) %>% min()
mn
## [1] "2022-12-10 21:21:02 UTC"
mx <- tweetsNBA %>% pull(created) %>% max()
mx
## [1] "2022-12-10 23:59:59 UTC"
Plot on original tweets by time.
Orig_plot <- ggplot(data1, aes(x = Created_At_Round)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of Tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")
Orig_plot %>% ggplotly()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
## Please report the issue at <]8;;https://github.com/tidyverse/ggplot2/issueshttps://github.com/tidyverse/ggplot2/issues]8;;>.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
—-ReTweets—-
Subsetting retweets
NBA_retweets <- NBAdf %>%
select(screenName,text,created, isRetweet) %>% filter(isRetweet == TRUE)
Saving and loading ReTweets
save(NBA_retweets,file = "ReTweetsdf.Rdata")
load(file = "ReTweetsdf.Rdata")
Grouping the data created
NBA_retweets %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-10 23:59:59 2022-12-10 21:21:03
data2 <- NBA_retweets %>% mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())
mn <- NBA_retweets %>% pull(created) %>% min()
mn
## [1] "2022-12-10 21:21:03 UTC"
mx <- NBA_retweets %>% pull(created) %>% max()
mx
## [1] "2022-12-10 23:59:59 UTC"
Plot on reTweets by time
reTweet_plot <- ggplot(data2, aes(x = Created_At_Round)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of ReTweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")
reTweet_plot %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.